import pandas
import scipy.stats
import json
import pywikibot
#Tranforming QIDs into English labels.
enwp = pywikibot.Site('en','wikipedia')
wikidata = enwp.data_repository()
retrieved = dict()
def english_label(qid):
if type(qid) is float:
if math.isnan(qid):
return None
#first see if we've done it
try:
return retrieved[qid]
except KeyError:
try:
page = pywikibot.ItemPage(wikidata, qid)
data = page.get()
lab = data['labels']['en']
retrieved[qid] = lab
return lab
except KeyError:
retrieved[qid] = qid
return qid
VERBOSE:pywiki:Starting 1 threads...
# gen_cult = pandas.read_csv('helpers/Chi_Squared_Test_Data.csv') was doing it this way but no longer.
allrecs = pandas.DataFrame.from_dict(json.load(open('helpers/world_cultures_shortcut.json','r')))
obs = pandas.crosstab(allrecs['culture'], allrecs['gender'])
obs.columns = map(english_label, obs.columns)
VERBOSE:pywiki:Found 1 wikidata:wikidata processes running, including this one.
obs
transgender female | intersex | fa'afafine | transgender male | male animal | woman | genderqueer | female | male | kathoey | |
---|---|---|---|---|---|---|---|---|---|---|
culture | ||||||||||
africa | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2256 | 13915 | 0 |
catholic european | 7 | 2 | 0 | 1 | 4 | 0 | 0 | 38267 | 262253 | 0 |
confucian | 8 | 0 | 0 | 0 | 0 | 0 | 0 | 5618 | 14534 | 0 |
english-speaking | 38 | 7 | 1 | 9 | 0 | 0 | 4 | 60753 | 223374 | 0 |
islamic | 3 | 0 | 0 | 0 | 0 | 0 | 0 | 4119 | 22693 | 0 |
latin america | 7 | 0 | 0 | 0 | 0 | 0 | 0 | 11555 | 64539 | 0 |
orthodox | 1 | 0 | 0 | 0 | 1 | 1 | 0 | 14178 | 81513 | 0 |
protestant european | 3 | 1 | 0 | 2 | 0 | 0 | 0 | 49801 | 244301 | 0 |
south asia | 5 | 0 | 0 | 0 | 0 | 0 | 0 | 5531 | 23594 | 1 |
chi2, p, dof, expected = scipy.stats.chi2_contingency(obs)
chi2, p, dof
(10430.455963736977, 0.0, 72)
pandas.DataFrame(expected)
0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | |
---|---|---|---|---|---|---|---|---|---|---|
0 | 1.018735 | 0.141491 | 0.014149 | 0.169789 | 0.070745 | 0.014149 | 0.056596 | 2717.729756 | 13451.770440 | 0.014149 |
1 | 18.932932 | 2.629574 | 0.262957 | 3.155489 | 1.314787 | 0.262957 | 1.051830 | 50508.329383 | 249997.797134 | 0.262957 |
2 | 1.270032 | 0.176393 | 0.017639 | 0.211672 | 0.088197 | 0.017639 | 0.070557 | 3388.128865 | 16770.001365 | 0.017639 |
3 | 17.903047 | 2.486534 | 0.248653 | 2.983841 | 1.243267 | 0.248653 | 0.994614 | 47760.852663 | 236398.790074 | 0.248653 |
4 | 1.689282 | 0.234622 | 0.023462 | 0.281547 | 0.117311 | 0.023462 | 0.093849 | 4506.581127 | 22305.931875 | 0.023462 |
5 | 4.794183 | 0.665859 | 0.066586 | 0.799031 | 0.332929 | 0.066586 | 0.266344 | 12789.682280 | 63304.259617 | 0.066586 |
6 | 6.028496 | 0.837291 | 0.083729 | 1.004749 | 0.418646 | 0.083729 | 0.334916 | 16082.520021 | 79602.604693 | 0.083729 |
7 | 18.528109 | 2.573348 | 0.257335 | 3.088018 | 1.286674 | 0.257335 | 1.029339 | 49428.363307 | 244652.359199 | 0.257335 |
8 | 1.835184 | 0.254887 | 0.025489 | 0.305864 | 0.127443 | 0.025489 | 0.101955 | 4895.812598 | 24232.485603 | 0.025489 |
np.ndarr